import org.apache.spark.{SparkConf, SparkContext}

object ParallelizeRDD {
  def main(args: Array[String]): Unit = {
    // Spark配置初始化
    val conf = new SparkConf()
      .setAppName("ParallelizeRDD")
      .setMaster("local[*]")

    val sc = new SparkContext(conf)

    // 创建RDD
    val rdd = sc.parallelize(List(
      "this is a test",
      "How are you",
      "I am fine",
      "can you tell me"
    ))

    // Map操作修正
    val words = rdd.map { x =>
      val firstWord = x.split(" ")(0)
      (firstWord, x)
    }


    words.collect().foreach(println)

    sc.stop()
  }
}
